library(here)
library(haven)
library(tidyverse)
library(zoo)
library(DT)
library(zipcode)
library(tidytext)
library(igraph)
library(ggraph)
identify_state = function(ZC, zipcode.dataset = zipcode){
  if(!is.na(ZC)) {
    loc = which(zipcode.dataset$zip == ZC)
    state = zipcode.dataset$state[loc]
    if(length(loc) == 0) state = NA
  }
  if(is.na(ZC)) state = NA
  return(state)
}
master = read_sav(
  here("../../Data Management R3/CC_Clean Survey Data/00_R3 MasterFile/MasterFile_groupings.sav"))
data(zipcode)

response_table = master %>%
  mutate(State = map(DEMO.001, identify_state)) %>%
  select(CaregiverID, Week, starts_with("OPEN"), State, FPL.150, RaceGroup, CaregiverAge) %>%
  arrange(Week) %>%
  group_by(CaregiverID) %>%
  mutate_if(is.labelled, as_factor, levels = "labels") %>%
  mutate_at(vars(State, FPL.150, RaceGroup, CaregiverAge), na.locf0) %>%
  ungroup() %>%
  select(-CaregiverID) %>%
  rename(canuse = OPEN.006,
         `Below 1.5xFPL` = FPL.150) %>%
  gather("Question", "Response", starts_with("OPEN")) %>%
  filter(Response != "")
## `mutate_if()` ignored the following grouping variables:
## Column `CaregiverID`
## Warning: attributes are not identical across measure variables;
## they will be dropped

Question: What are the biggest challenges and concerns for you and your family right now?

Responses

response_table %>%
  filter(grepl(params$num, Question)) %>%
  select(-Question) %>%
  datatable(filter = "top", rownames = F, 
            options = list(autoWidth = T))
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Bi-gram network

bigrams <- master %>%
  filter(UserLanguage == "EN") %>%
  select(CaregiverID, starts_with("OPEN")) %>%
  gather("Question", "Response", starts_with("OPEN")) %>%
  filter(Response != "")%>%
  filter(grepl(params$num, Question)) %>%
  unnest_tokens(bigram, Response, token = "ngrams", n = 2)
## Warning: attributes are not identical across measure variables;
## they will be dropped
  bigrams <- bigrams %>%
  separate(bigram, c("word1", "word2"), sep = " ")
  
  bigrams <- bigrams %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>%
    filter(!is.na(word1)) %>%
    filter(!is.na(word2))
  
  bigram_counts <- bigrams %>% 
  count(word1, word2, sort = TRUE)
  
  filter_num = ifelse(params$num %in% c("1", "2"), 20, 5)

  bigram_graph <- bigram_counts %>%
  filter(n >= filter_num) %>%
  graph_from_data_frame()
  
  ggraph(bigram_graph, layout = "fr") +
  geom_edge_link(alpha = .3) +
  geom_node_point(alpha = .3) +
  geom_node_text(aes(label = name), vjust = 1, hjust = 1, size = 2)